import importlib.util
from pathlib import Path

import numpy as np
import pandas as pd


def load_shiftshare_module(root: Path):
    path = root / "analysis" / "19_iv_shiftshare_rollout_absorb.py"
    spec = importlib.util.spec_from_file_location("shiftshare", path)
    mod = importlib.util.module_from_spec(spec)
    assert spec.loader is not None
    spec.loader.exec_module(mod)
    return mod


def to_latex_tabular(df: pd.DataFrame) -> str:
    cols = [
        "label",
        "nobs",
        "coef_lowEdu",
        "se_lowEdu",
        "coef_highEdu",
        "coef_deltaHighMinusLow",
        "se_deltaHighMinusLow",
        "fsF_netusoft",
        "fsF_netusoft_x_edu",
        "AR_pvalue",
    ]
    header = [
        "Outcome",
        "N",
        "b(LowEdu)",
        "se(LowEdu)",
        "b(HighEdu)",
        "Delta(High-Low)",
        "se(Delta)",
        "F(net)",
        "F(net$\\times$edu)",
        "AR p",
    ]
    d = df[cols].copy()
    d["nobs"] = d["nobs"].map(lambda x: f"{x:.0f}" if pd.notna(x) else "")
    for c in ["coef_lowEdu", "se_lowEdu", "coef_highEdu", "coef_deltaHighMinusLow", "se_deltaHighMinusLow"]:
        d[c] = d[c].map(lambda x: f"{x:.4f}" if pd.notna(x) else "")
    for c in ["fsF_netusoft", "fsF_netusoft_x_edu"]:
        d[c] = d[c].map(lambda x: f"{x:.2f}" if pd.notna(x) else "")
    d["AR_pvalue"] = d["AR_pvalue"].map(lambda x: f"{x:.4g}" if pd.notna(x) else "")

    out = []
    out.append("\\begin{tabular}{lrrrrrrrrr}")
    out.append("\\toprule")
    out.append(" & ".join(header) + " \\\\")
    out.append("\\midrule")
    for _, row in d.iterrows():
        out.append(" & ".join(str(row[c]) for c in cols) + " \\\\")
    out.append("\\bottomrule")
    out.append("\\end{tabular}")
    return "\n".join(out) + "\n"


def main() -> None:
    root = Path(__file__).resolve().parents[1]
    paper_tables = root / "paper_joc" / "tables"
    paper_tables.mkdir(parents=True, exist_ok=True)

    mod = load_shiftshare_module(root)
    df = mod.build_dataset(root)

    controls = ["agea", "gndr", "hinctnta"]

    # Diagnostic outcomes that are plausibly slow-moving and not directly targeted by marginal increases
    # in general internet use frequency (included as exclusion diagnostics, not mechanisms).
    placebo_specs = [
        ("stfdem", "diagnostic: satisfaction with democracy"),
        ("trstplt", "diagnostic: trust in politicians"),
        ("trstprl", "diagnostic: trust in parliament"),
        ("trstprt", "diagnostic: trust in parties"),
        ("ppltrst", "diagnostic: general interpersonal trust"),
        ("uempla", "diagnostic: unemployed (1/0)"),
        ("wrkorg", "diagnostic: member of trade union/organization (1/0)"),
        ("wrkprty", "diagnostic: worked in party/union action (1/0)"),
    ]

    rows = []
    for y, label in placebo_specs:
        if y not in df.columns:
            continue
        try:
            res = mod.fit_iv_interaction_absorb(df, y, controls=controls)
        except Exception:
            continue
        rows.append(mod.summarize_result(res, y=y, label=label))

    tab = pd.DataFrame(rows)
    if tab.empty:
        raise RuntimeError("No placebo outcomes were estimated; check variable availability.")

    (paper_tables / "iv_shiftshare_extended_placebos.tex").write_text(to_latex_tabular(tab), encoding="utf-8")
    (root / "outputs" / "iv_shiftshare_extended_placebos.csv").write_text(tab.to_csv(index=False), encoding="utf-8")
    print(f"Wrote: {paper_tables / 'iv_shiftshare_extended_placebos.tex'} rows={len(tab)}")


if __name__ == "__main__":
    main()

